In [1]:
DT <- read.table("../Data/All_data.txt")
In [2]:
names(DT)
In [3]:
head(DT)
In [4]:
levels(DT$Disease)
In [5]:
DT$Dis_lab <- DT$Disease
levels(DT$Dis_lab) <- c("",
"Cardiovasc.\n& circulatory",
"Chronic\nrespiratory",
"Cirrhosis",
"Congenital\nanomalies",
"Diabetes, urin.\nmale infertility",
"Common\ninfect. dis.",
"Digestive\ndis.",
"Gynecol.\ndis.",
"Hemoglob. &\nhemolytic\nanemia",
"Hepatitis",
"HIV",
"Leprosy",
"Malaria",
"Maternal\ndisorders",
"Mental and\nbehavioral",
"Musculosk.\ndisorders",
"Neglected trop.\ndiseases.",
"Neonatal\ndisorders",
"Neoplasms",
"Neurological\ndisorders",
"Nutritional\ndeficiencies",
"Oral\ndisorders",
"Sense organ\ndiseases",
"STD",
"Skin and\nsubcutan.",
"Sudden infant\ndeath synd.",
"Tuberculosis")
DT$Dis_tooltip <- DT$Disease
levels(DT$Dis_tooltip) <- c("All diseases",
"Cardiovascular and circulatory diseases",
"Chronic respiratory diseases",
"Cirrhosis of the liver",
"Congenital anomalies",
"Diabetes, urinary diseases and male infertility",
"Common infectious diseases",
"Digestive diseases",
"Gynecological diseases",
"Hemoglobinopathies and hemolytic anemias",
"Hepatitis",
"HIV",
"Leprosy",
"Malaria",
"Maternal disorders",
"Mental and behavioral disorders",
"Musculoskeletal disorders",
"Neglected tropical diseases",
"Neonatal disorders",
"Neoplasms",
"Neurological disorders",
"Nutritional deficiencies",
"Oral disorders",
"Sense organ diseases",
"Sexually transmitted diseases excluding HIV",
"Skin and subcutaneous diseases",
"Sudden infant death syndrome",
"Tuberculosis")
DT$regs_lab <- DT$Region
levels(DT$regs_lab) <- c("World",
"Eastern Europe and Central Asia",
"High-income countries",
"Latin America and Caribbean",
"Non-high-income countries",
"North Africa and Middle East",
"South Asia",
"Southeast Asia, East Asia and Oceania",
"Sub-Saharian Africa")
In [9]:
Mgbd <- read.table("../Data/27_gbd_groups.txt")
Mgbd$tooltip <- Mgbd$x
levels(Mgbd$tooltip) <- levels(DT$Dis_tooltip)[-1]
Mgbd$tooltip
In [6]:
unique(DT$Disease[is.na(DT$Nb_RCTs_med)])
In [7]:
library(data.table)
data <- read.table("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/database_RCTs_regions_27diseases.txt")
In [8]:
Lgbd <- lapply(as.character(data$GBD27),function(x){as.numeric(unlist(strsplit(x,"&")))})
In [9]:
regs <- sort(unique(unlist(strsplit(as.character(data$Regions),"&"))))
LR <- lapply(regs,function(x){1:nrow(data)%in%grep(x,data$Regions)})
LR <- do.call('cbind',LR)
LR <- data.table(LR)
LR$TrialID <- data$TrialID
#Nb of patients per region per trial
#Supressing sample size of trials with sample size below 10 and above 200k
data$Sample[data$Sample<10 | data$Sample>200000] <- NA
#Nb countries per region per trial to distribute sample size equally across countries
nb_ctrs <- lapply(strsplit(as.character(data$Nb_ctr_per_reg),'&'),as.numeric)
RGs <-strsplit(as.character(data$Regions),'&')
pats <- data.frame(TrialID = rep(data$TrialID,sapply(nb_ctrs,length)),
Nb_ctrs = unlist(nb_ctrs),
Region = unlist(RGs),
Tot_sample = rep(data$Sample,sapply(nb_ctrs,length)))
pats$tot_ctrs <- rep(sapply(nb_ctrs,sum),sapply(nb_ctrs,length))
pats$sample_per_reg <- pats$Tot_sample*pats$Nb_ctrs/pats$tot_ctrs
pats <- data.table(pats)
setkey(pats,TrialID)
In [10]:
dis <- which(Mgbd$x%in%unique(DT$Disease[is.na(DT$Nb_RCTs_med)]))
dis
In [11]:
A <- list()
for(i in 1:length(dis)){
d <- dis[i]
repl <- data.table(
TrialID = data$TrialID,
recl_dis = as.numeric(unlist(lapply(Lgbd,function(x){d%in%x}))),
recl_gbd = as.numeric(unlist(lapply(Lgbd,function(x){length(x)>0})))
)
setkey(repl,TrialID)
replpats <- merge(pats,repl)
setkey(replpats,Region)
#Output data
df <- data.table(Region=c(sort(regs),"All","Non-HI"),Dis=rep(c("dis","all"),each=9),RCTs=as.integer(0),Patients=as.numeric(0))
#Par région
#Nb trials par region concernant la maladie and relevant to GBD
df[Dis=="dis" & Region%in%regs,RCTs:=table(replpats[recl_dis==1,Region])]
df[Dis=="all" & Region%in%regs,RCTs:=table(replpats[recl_gbd>=1,Region])]
#Nb patients par région concernant la maladie and relevant to GBD
df[Dis=="dis" & Region%in%regs,Patients:=replpats[recl_dis==1,][regs,sum(sample_per_reg,na.rm=TRUE),by=.EACHI]$V1]
df[Dis=="all" & Region%in%regs,Patients:=replpats[recl_gbd>=1,][regs,sum(sample_per_reg,na.rm=TRUE),by=.EACHI]$V1]
#WorldWide
#Nb trials worldwide concernant la maladie and relevant to GBD
df[Dis=="dis" & Region=="All",RCTs:=sum(repl$recl_dis)]
df[Dis=="all" & Region=="All",RCTs:=sum(repl$recl_gbd>=1)]
#Nb patients worldwide concernant la maladie and relevant to GBD
df[Dis=="dis" & Region=="All",Patients:=sum(replpats[recl_dis==1,sample_per_reg],na.rm=TRUE)]
df[Dis=="all" & Region=="All",Patients:=sum(replpats[recl_gbd>=1,sample_per_reg],na.rm=TRUE)]
#Non-HI countries
#Nb trials worldwide concernant la maladie and relevant to GBD
df[Dis=="dis" & Region=="Non-HI",RCTs:=replpats[Region!="High-income",][recl_dis==1,][!duplicated(TrialID),.N]]
df[Dis=="all" & Region=="Non-HI",RCTs:=replpats[Region!="High-income",][recl_gbd>=1,][!duplicated(TrialID),.N]]
#Nb patients worldwide concernant la maladie and relevant to GBD
df[Dis=="dis" & Region=="Non-HI",Patients:=sum(replpats[Region!="High-income",][recl_dis==1,sample_per_reg],na.rm=TRUE)]
df[Dis=="all" & Region=="Non-HI",Patients:=sum(replpats[Region!="High-income",][recl_gbd>=1,sample_per_reg],na.rm=TRUE)]
A[[i]] <- df
}
In [12]:
data_f <- data.frame()
for(i in 1:length(dis)){
d <- dis[i]
DF <- A[[i]]
data <- DF[Dis=="dis",][,lapply(.SD,function(x){quantile(x,probs=c(0.025,0.5,0.975))}),
by=c("Region"),
.SDcols=c("RCTs","Patients")]
dataprop <- DF[,lapply(.SD[Dis=="dis",]/.SD[Dis=="all",],function(x){100*quantile(x,probs=c(0.025,0.5,0.975))}),
by=c("Region"),
.SDcols=c("RCTs","Patients")]
df <- data.frame(cbind(cbind(unique(data$Region),as.character(Mgbd$x[d])),
matrix(data$RCTs,ncol=3,byrow=TRUE),
matrix(data$Patients,ncol=3,byrow=TRUE),
matrix(dataprop$RCTs,ncol=3,byrow=TRUE),
matrix(dataprop$Patients,ncol=3,byrow=TRUE)))
names(df) <- c("Region","Disease",
paste(paste("Nb","RCTs",sep="_"),c("low","med","up"),sep="_"),
paste(paste("Nb","Patients",sep="_"),c("low","med","up"),sep="_"),
paste(paste("Prop","RCTs",sep="_"),c("low","med","up"),sep="_"),
paste(paste("Prop","Patients",sep="_"),c("low","med","up"),sep="_"))
data_f <- rbind(data_f,df)
}
In [13]:
data_f <- data_f[order(as.character(data_f$Region),as.character(data_f$Disease)),]
DT <- DT[order(as.character(DT$Region),as.character(DT$Disease)),]
table(paste(data_f$Region,data_f$Disease)==paste(DT$Region,DT$Disease)[DT$Disease%in%as.character(Mgbd$x[dis])])
In [14]:
names(data_f)
names(DT)
In [15]:
DT$Nb_RCTs_med[DT$Disease%in%as.character(Mgbd$x[dis])] <- as.numeric(as.character(data_f$Nb_RCTs_med))
DT$Nb_Patients_med[DT$Disease%in%as.character(Mgbd$x[dis])] <- as.numeric(as.character(data_f$Nb_Patients_med))
DT$Prop_loc_RCTs_med[DT$Disease%in%as.character(Mgbd$x[dis])] <- as.numeric(as.character(data_f$Prop_RCTs_med))
DT$Prop_loc_Patients_med[DT$Disease%in%as.character(Mgbd$x[dis])] <- as.numeric(as.character(data_f$Prop_Patients_med))
In [16]:
regs <- sort(unique(DF$Region))
regs <- regs[regs!="All"]
regs
In [17]:
data_f <- data.frame()
for(i in 1:length(dis)){
d <- dis[i]
DF <- A[[i]]
DFr <- DF[DF$Region%in%regs & DF$Dis == "dis",]
DFr$RCTs_all <- rep(DF$RCTs[DF$Dis=="dis" & DF$Region=="All"],each=length(regs))
DFr$RCTs_NHI <- rep(DF$RCTs[DF$Dis=="dis" & DF$Region=="Non-HI"],each=length(regs))
DFr$Patients_all <- rep(DF$Patients[DF$Dis=="dis" & DF$Region=="All"],each=length(regs))
DFr$Patients_NHI <- rep(DF$Patients[DF$Dis=="dis" & DF$Region=="Non-HI"],each=length(regs))
df <- data.frame(cbind(regs,as.character(Mgbd$x[d]),
do.call('rbind',by(DFr[DFr$RCTs_all!=0,],
DFr$Region[DFr$RCTs_all!=0],
function(x){100*quantile(x$RCTs/x$RCTs_all,probs=c(0.025,0.5,0.975))})),
do.call('rbind',by(DFr[DFr$Patients_all!=0,],
DFr$Region[DFr$Patients_all!=0],
function(x){100*quantile(x$Patients/x$Patients_all,probs=c(0.025,0.5,0.975))}))))
if(sum(DFr$RCTs_NHI)!=0){
df <- cbind(df,cbind(
do.call('rbind',by(DFr[DFr$RCTs_NHI!=0,],
DFr$Region[DFr$RCTs_NHI!=0],
function(x){100*quantile(x$RCTs/x$RCTs_NHI,probs=c(0.025,0.5,0.975))})),
do.call('rbind',by(DFr[DFr$Patients_NHI!=0,],
DFr$Region[DFr$Patients_NHI!=0],
function(x){100*quantile(x$Patients/x$Patients_NHI,probs=c(0.025,0.5,0.975))}))))
}
if(sum(DFr$RCTs_NHI)==0){
df <- cbind(df,matrix(0,nrow=length(regs),ncol=3),matrix(0,nrow=length(regs),ncol=3))
}
names(df) <- c("Region","Disease",
paste(paste("Prop_all","RCTs",sep="_"),c("low","med","up"),sep="_"),
paste(paste("Prop_all","Patients",sep="_"),c("low","med","up"),sep="_"),
paste(paste("Prop_NHI","RCTs",sep="_"),c("low","med","up"),sep="_"),
paste(paste("Prop_NHI","Patients",sep="_"),c("low","med","up"),sep="_"))
data_f <- rbind(data_f,df)
}
In [18]:
data_f <- data_f[order(as.character(data_f$Region),as.character(data_f$Disease)),]
table(paste(data_f$Region,data_f$Disease)==paste(DT$Region,DT$Disease)[DT$Disease%in%as.character(Mgbd$x[dis]) & DT$Region!="All"])
In [19]:
names(data_f)
names(DT)
In [20]:
DT$Prop_glob_RCTs_med[DT$Disease%in%as.character(Mgbd$x[dis]) & DT$Region!="All"] <-
as.numeric(as.character(data_f$Prop_all_RCTs_med))
DT$Prop_glob_Patients_med[DT$Disease%in%as.character(Mgbd$x[dis]) & DT$Region!="All"] <-
as.numeric(as.character(data_f$Prop_all_Patients_med))
DT$Prop_NHI_RCTs_med[DT$Disease%in%as.character(Mgbd$x[dis]) & DT$Region!="All"] <-
as.numeric(as.character(data_f$Prop_NHI_RCTs_med))
DT$Prop_NHI_Patients_med[DT$Disease%in%as.character(Mgbd$x[dis]) & DT$Region!="All"] <-
as.numeric(as.character(data_f$Prop_NHI_Patients_med))
In [21]:
head(DT[DT$Disease=="Leprosy",])
In [22]:
write.table(DT,"../Interactive_figure/data/data.txt")
In [23]:
ratio_align <- read.table("../Data/Alignment_ratios_within_regions_across_diseases_wt_sims_patients_metrs_burdens.txt")
In [24]:
write.table(ratio_align,"../Interactive_figure/data/data_ratios.txt")
In [ ]: